upload ACS data
library(foreign)
link = "https://github.com/G7-PUBPOL-543/ACS/raw/main/ManipulatedACS_V2.dta"
ACSData = read.dta(link)
check variable: race
# Check names of all the variables in ACS Data
names(ACSData)
## [1] "hhwt" "numprec" "hhtype" "adjust" "region"
## [6] "statefip" "countyfip" "metro" "city" "citypop"
## [11] "ownershp" "mortgage" "acrehous" "mortamt1" "mortamt2"
## [16] "taxincl" "insincl" "propinsr" "owncost" "rentgrs"
## [21] "costelec" "costgas" "costwatr" "costfuel" "hhincome"
## [26] "foodstmp" "valueh" "builtyr2" "unitsstr" "phone"
## [31] "vehicles" "ssmc" "perwt" "famsize" "sex"
## [36] "age" "marst" "birthyr" "marrno" "yrmarr"
## [41] "race" "raced" "hispan" "hispand" "bpl"
## [46] "bpld" "citizen" "yrnatur" "yrimmig" "yrsusa1"
## [51] "language" "speakeng" "tribe" "racamind" "racasian"
## [56] "racblk" "racpacis" "racwht" "racother" "hcovany"
## [61] "hcovpriv" "hinsemp" "hinspur" "hinstri" "hcovpub"
## [66] "hinscaid" "hinscare" "hinsva" "hinsihs" "school"
## [71] "educ" "educd" "schltype" "degfield" "degfieldd"
## [76] "degfield2" "degfield2d" "empstat" "empstatd" "labforce"
## [81] "wkswork1" "wkswork2" "uhrswork" "inctot" "ftotinc"
## [86] "incwage" "incss" "incwelfr" "incinvst" "incretir"
## [91] "incsupp" "incother" "incearn" "poverty" "diffrem"
## [96] "diffphys" "diffmob" "diffcare" "diffsens" "diffeye"
## [101] "diffhear" "pwstate2" "pwcounty" "tranwork" "carpool"
## [106] "riders" "trantime" "departs" "arrives"
#see values in the variable race:
head(ACSData$race,20)
## [1] white white
## [3] white black/african american/negro
## [5] white other race, nec
## [7] white white
## [9] white white
## [11] white white
## [13] white two major races
## [15] white white
## [17] black/african american/negro other race, nec
## [19] american indian or alaska native black/african american/negro
## 9 Levels: white ... three or more major races
get frequency table of race: RaceFreq
# absolute values
RaceCount=table(ACSData$race,
exclude = 'nothing')
RaceCount
##
## white black/african american/negro
## 87159 14408
## american indian or alaska native chinese
## 915 1595
## japanese other asian or pacific islander
## 289 5246
## other race, nec two major races
## 6022 3579
## three or more major races
## 494
# relative values
RaceProptoplot=prop.table(RaceCount)*100
RaceProptoplot
##
## white black/african american/negro
## 72.8102784 12.0360547
## american indian or alaska native chinese
## 0.7643663 1.3324200
## japanese other asian or pacific islander
## 0.2414228 4.3823669
## other race, nec two major races
## 5.0306164 2.9898001
## three or more major races
## 0.4126743
# Turn the frequency table into a data frame:
RaceFreq=as.data.frame(RaceProptoplot)
# renaming data frame
names(RaceFreq)=c("race","pct")
RaceFreq
## race pct
## 1 white 72.8102784
## 2 black/african american/negro 12.0360547
## 3 american indian or alaska native 0.7643663
## 4 chinese 1.3324200
## 5 japanese 0.2414228
## 6 other asian or pacific islander 4.3823669
## 7 other race, nec 5.0306164
## 8 two major races 2.9898001
## 9 three or more major races 0.4126743
Time to plot!
library(ggplot2)
Order RaceFreq
RaceFreq = RaceFreq[order(RaceFreq$pct),]
RaceFreq
## race pct
## 5 japanese 0.2414228
## 9 three or more major races 0.4126743
## 3 american indian or alaska native 0.7643663
## 4 chinese 1.3324200
## 8 two major races 2.9898001
## 6 other asian or pacific islander 4.3823669
## 7 other race, nec 5.0306164
## 2 black/african american/negro 12.0360547
## 1 white 72.8102784
# save the order
RaceOrd = RaceFreq[order(RaceFreq$pct),'race']
base
base = ggplot(data = RaceFreq,
aes(x = race,
y = pct))
base = base + theme_light() # add background: light theme
## add order
base= base + scale_x_discrete(limits=RaceOrd)
base
plot1: + geom bar
plot1 = base + geom_bar(stat='identity')
plot1
plot2: + title
# name the title
TitleRace = list(Title = "Frequency Table of Race in the United States",
SubTi = NULL,
XTi = "Race",
YTi = "Percentage",
Sou = "Source: 2019 American Community Survey (ACS) Data")
# add title
plot2 = plot1 + labs(title = TitleRace$Title,
subtitle = TitleRace$SubTi,
x = TitleRace$XTi,
y = TitleRace$YTi,
caption = TitleRace$Sou)
plot2
plot3: customize Y axis
library(scales)
plot3 = plot2 + scale_y_continuous(breaks = c(5, 10, 20, 40, 60, 80),
limits = c(0, 80),
labels = unit_format(suffix = '%'))
plot3
plot4: Title/subtitle positions:
plot4 = plot3 + theme(plot.caption = element_text(hjust = 1),
plot.title = element_text(hjust = 0.5))
plot4 = plot4 + coord_flip() # flip the plot to fully show the types of race
plot4
plot5: Annotating the bars:
paste0(round(RaceFreq$pct,2), '%')
## [1] "0.24%" "0.41%" "0.76%" "1.33%" "2.99%" "4.38%" "5.03%" "12.04%"
## [9] "72.81%"
LABELS=paste0(round(RaceFreq$pct,2), '%')
plot5 = plot4 + geom_text(vjust = 0,
hjust = -0.1,
size = 3,
aes(y = pct ,
label = LABELS))
plot5
plot5
I’ll explore Race and Health Insurance Coverage in the United States in this plot. As these two variable are both categorical, I’ll use contigency tables to organize the relationship.
head(ACSData,5)
## hhwt numprec hhtype adjust region statefip countyfip
## 1 58 1 n/a 1.010145 east south central div alabama 0
## 2 64 1 n/a 1.010145 east south central div alabama 0
## 3 54 1 n/a 1.010145 east south central div alabama 0
## 4 75 1 n/a 1.010145 east south central div alabama 0
## 5 26 1 n/a 1.010145 east south central div alabama 3
## metro
## 1 in metropolitan area: central/principal city status indeterminable (mixed)
## 2 in metropolitan area: central/principal city status indeterminable (mixed)
## 3 not in metropolitan area
## 4 in metropolitan area: in central/principal city
## 5 in metropolitan area: central/principal city status indeterminable (mixed)
## city citypop ownershp mortgage acrehous
## 1 not in identifiable city (or size group) 0 n/a n/a n/a
## 2 not in identifiable city (or size group) 0 n/a n/a n/a
## 3 not in identifiable city (or size group) 0 n/a n/a n/a
## 4 not in identifiable city (or size group) 0 n/a n/a n/a
## 5 not in identifiable city (or size group) 0 n/a n/a n/a
## mortamt1 mortamt2 taxincl insincl propinsr owncost rentgrs costelec costgas
## 1 0 0 n/a n/a 0 99999 0 0 0
## 2 0 0 n/a n/a 0 99999 0 0 0
## 3 0 0 n/a n/a 0 99999 0 0 0
## 4 0 0 n/a n/a 0 99999 0 0 0
## 5 0 0 n/a n/a 0 99999 0 0 0
## costwatr costfuel hhincome foodstmp valueh builtyr2 unitsstr phone vehicles
## 1 0 0 9999999 no 9999999 n/a n/a n/a n/a
## 2 0 0 9999999 no 9999999 n/a n/a n/a n/a
## 3 0 0 9999999 no 9999999 n/a n/a n/a n/a
## 4 0 0 9999999 no 9999999 n/a n/a n/a n/a
## 5 0 0 9999999 no 9999999 n/a n/a n/a n/a
## ssmc perwt famsize
## 1 households without a same-sex married couple 58 1 family member present
## 2 households without a same-sex married couple 64 1 family member present
## 3 households without a same-sex married couple 54 1 family member present
## 4 households without a same-sex married couple 75 1 family member present
## 5 households without a same-sex married couple 26 1 family member present
## sex age marst birthyr marrno yrmarr
## 1 male 37 never married/single 1982 not applicable 0
## 2 male 19 never married/single 2000 not applicable 0
## 3 female 87 widowed 1932 married once 1953
## 4 male 26 never married/single 1993 not applicable 0
## 5 male 49 never married/single 1970 not applicable 0
## race raced hispan
## 1 white white not hispanic
## 2 white white not hispanic
## 3 white white not hispanic
## 4 black/african american/negro black/african american/negro not hispanic
## 5 white white not hispanic
## hispand bpl bpld citizen yrnatur yrimmig yrsusa1
## 1 not hispanic indiana indiana n/a n/a 0 0
## 2 not hispanic alabama alabama n/a n/a 0 0
## 3 not hispanic alabama alabama n/a n/a 0 0
## 4 not hispanic north carolina north carolina n/a n/a 0 0
## 5 not hispanic alabama alabama n/a n/a 0 0
## language speakeng tribe racamind racasian
## 1 english yes, speaks only english not applicable or blank no no
## 2 english yes, speaks only english not applicable or blank no no
## 3 english yes, speaks only english not applicable or blank no no
## 4 english yes, speaks only english not applicable or blank no no
## 5 english yes, speaks only english not applicable or blank no no
## racblk racpacis racwht racother hcovany
## 1 no no yes no no health insurance coverage
## 2 no no yes no with health insurance coverage
## 3 no no yes no with health insurance coverage
## 4 yes no no no no health insurance coverage
## 5 no no yes no with health insurance coverage
## hcovpriv
## 1 without private health insurance coverage
## 2 with private health insurance coverage
## 3 with private health insurance coverage
## 4 without private health insurance coverage
## 5 without private health insurance coverage
## hinsemp hinspur
## 1 no insurance through employer/union no insurance purchased directly
## 2 has insurance through employer/union no insurance purchased directly
## 3 no insurance through employer/union has insurance purchased directly
## 4 no insurance through employer/union no insurance purchased directly
## 5 no insurance through employer/union no insurance purchased directly
## hinstri hcovpub
## 1 no insurance through tricare without public health insurance coverage
## 2 no insurance through tricare without public health insurance coverage
## 3 no insurance through tricare with public health insurance coverage
## 4 no insurance through tricare without public health insurance coverage
## 5 no insurance through tricare with public health insurance coverage
## hinscaid hinscare hinsva
## 1 no insurance through medicaid no no insurance through va
## 2 no insurance through medicaid no no insurance through va
## 3 has insurance through medicaid yes no insurance through va
## 4 no insurance through medicaid no no insurance through va
## 5 has insurance through medicaid no no insurance through va
## hinsihs school
## 1 no insurance through indian health service no, not in school
## 2 no insurance through indian health service yes, in school
## 3 no insurance through indian health service no, not in school
## 4 no insurance through indian health service no, not in school
## 5 no insurance through indian health service no, not in school
## educ educd
## 1 1 year of college 1 or more years of college credit, no degree
## 2 1 year of college 1 or more years of college credit, no degree
## 3 grade 12 regular high school diploma
## 4 grade 9 grade 9
## 5 grade 5, 6, 7, or 8 grade 8
## schltype degfield degfieldd degfield2 degfield2d empstat
## 1 not enrolled n/a n/a n/a n/a not in labor force
## 2 public school n/a n/a n/a n/a employed
## 3 not enrolled n/a n/a n/a n/a not in labor force
## 4 not enrolled n/a n/a n/a n/a not in labor force
## 5 not enrolled n/a n/a n/a n/a employed
## empstatd labforce wkswork1 wkswork2 uhrswork
## 1 not in labor force no, not in the labor force 16 14-26 weeks 55
## 2 at work yes, in the labor force 26 14-26 weeks 40
## 3 not in labor force no, not in the labor force 0 n/a n/a
## 4 not in labor force no, not in the labor force 48 48-49 weeks 40
## 5 at work yes, in the labor force 11 1-13 weeks 21
## inctot ftotinc incwage incss incwelfr incinvst incretir incsupp incother
## 1 15800 9999999 15800 0 0 0 0 0 0
## 2 800 9999999 800 0 0 0 0 0 0
## 3 13800 9999999 0 13800 0 0 0 0 0
## 4 23500 9999999 23500 0 0 0 0 0 0
## 5 10700 9999999 1700 0 0 0 0 9000 0
## incearn poverty diffrem diffphys
## 1 15800 0 has cognitive difficulty no ambulatory difficulty
## 2 800 0 no cognitive difficulty no ambulatory difficulty
## 3 0 0 has cognitive difficulty has ambulatory difficulty
## 4 23500 0 no cognitive difficulty no ambulatory difficulty
## 5 1700 81 has cognitive difficulty has ambulatory difficulty
## diffmob diffcare diffsens
## 1 no independent living difficulty no no vision or hearing difficulty
## 2 no independent living difficulty no no vision or hearing difficulty
## 3 has independent living difficulty yes no vision or hearing difficulty
## 4 no independent living difficulty no no vision or hearing difficulty
## 5 has independent living difficulty yes no vision or hearing difficulty
## diffeye diffhear pwstate2 pwcounty tranwork carpool
## 1 no no n/a 0 n/a n/a
## 2 no no alabama 0 auto, truck, or van drives alone
## 3 no no n/a 0 n/a n/a
## 4 no no n/a 0 n/a n/a
## 5 no no alabama 3 auto, truck, or van carpools
## riders trantime departs arrives
## 1 n/a 0 0 0
## 2 drives alone 15 902 919
## 3 n/a 0 0 0
## 4 n/a 0 0 0
## 5 3 15 702 719
# check variable hcovany: with health insurance coverage or not
head(ACSData$hcovany,20)
## [1] no health insurance coverage with health insurance coverage
## [3] with health insurance coverage no health insurance coverage
## [5] with health insurance coverage with health insurance coverage
## [7] with health insurance coverage with health insurance coverage
## [9] with health insurance coverage with health insurance coverage
## [11] with health insurance coverage with health insurance coverage
## [13] no health insurance coverage with health insurance coverage
## [15] with health insurance coverage with health insurance coverage
## [17] no health insurance coverage with health insurance coverage
## [19] with health insurance coverage with health insurance coverage
## Levels: no health insurance coverage with health insurance coverage
# contingency table of counts
(RaceHIC = table(ACSData$race,ACSData$hcovany)) # race and health insurance coverage
##
## no health insurance coverage
## white 7041
## black/african american/negro 1541
## american indian or alaska native 170
## chinese 91
## japanese 19
## other asian or pacific islander 327
## other race, nec 1291
## two major races 284
## three or more major races 33
##
## with health insurance coverage
## white 80118
## black/african american/negro 12867
## american indian or alaska native 745
## chinese 1504
## japanese 270
## other asian or pacific islander 4919
## other race, nec 4731
## two major races 3295
## three or more major races 461
# compute marginal percent (per column) from contingency table
library(magrittr)
(RaceHIC_mgCol = prop.table(RaceHIC,
margin = 2)%>%round(.,3))
##
## no health insurance coverage
## white 0.652
## black/african american/negro 0.143
## american indian or alaska native 0.016
## chinese 0.008
## japanese 0.002
## other asian or pacific islander 0.030
## other race, nec 0.120
## two major races 0.026
## three or more major races 0.003
##
## with health insurance coverage
## white 0.736
## black/african american/negro 0.118
## american indian or alaska native 0.007
## chinese 0.014
## japanese 0.002
## other asian or pacific islander 0.045
## other race, nec 0.043
## two major races 0.030
## three or more major races 0.004
#make a data frame from contingency table
RaceHIC_DF = as.data.frame(RaceHIC)
names(RaceHIC_DF) = c("race","HICstatus","counts")
RaceHIC_DF
## race HICstatus counts
## 1 white no health insurance coverage 7041
## 2 black/african american/negro no health insurance coverage 1541
## 3 american indian or alaska native no health insurance coverage 170
## 4 chinese no health insurance coverage 91
## 5 japanese no health insurance coverage 19
## 6 other asian or pacific islander no health insurance coverage 327
## 7 other race, nec no health insurance coverage 1291
## 8 two major races no health insurance coverage 284
## 9 three or more major races no health insurance coverage 33
## 10 white with health insurance coverage 80118
## 11 black/african american/negro with health insurance coverage 12867
## 12 american indian or alaska native with health insurance coverage 745
## 13 chinese with health insurance coverage 1504
## 14 japanese with health insurance coverage 270
## 15 other asian or pacific islander with health insurance coverage 4919
## 16 other race, nec with health insurance coverage 4731
## 17 two major races with health insurance coverage 3295
## 18 three or more major races with health insurance coverage 461
#adding marginal percents:
RaceHIC_DF$pctCol = as.data.frame(RaceHIC_mgCol)[,3]
RaceHIC_DF
## race HICstatus counts
## 1 white no health insurance coverage 7041
## 2 black/african american/negro no health insurance coverage 1541
## 3 american indian or alaska native no health insurance coverage 170
## 4 chinese no health insurance coverage 91
## 5 japanese no health insurance coverage 19
## 6 other asian or pacific islander no health insurance coverage 327
## 7 other race, nec no health insurance coverage 1291
## 8 two major races no health insurance coverage 284
## 9 three or more major races no health insurance coverage 33
## 10 white with health insurance coverage 80118
## 11 black/african american/negro with health insurance coverage 12867
## 12 american indian or alaska native with health insurance coverage 745
## 13 chinese with health insurance coverage 1504
## 14 japanese with health insurance coverage 270
## 15 other asian or pacific islander with health insurance coverage 4919
## 16 other race, nec with health insurance coverage 4731
## 17 two major races with health insurance coverage 3295
## 18 three or more major races with health insurance coverage 461
## pctCol
## 1 0.652
## 2 0.143
## 3 0.016
## 4 0.008
## 5 0.002
## 6 0.030
## 7 0.120
## 8 0.026
## 9 0.003
## 10 0.736
## 11 0.118
## 12 0.007
## 13 0.014
## 14 0.002
## 15 0.045
## 16 0.043
## 17 0.030
## 18 0.004
library(ggplot2)
base1 = ggplot(data = RaceHIC_DF,
aes(x = HICstatus, y = counts,
fill= race ))
barGC = base1 + geom_bar(stat = "identity",
position = 'dodge')
barGC
# add labels
barGC + geom_text(position = position_dodge(width=0.9),
angle = 90,
hjust = -0.5,
aes(label = counts))
# change color
barGC + scale_fill_brewer(palette="Paired")
reorder by race
RaceHIC_DF$race = factor(RaceHIC_DF$race,
levels = c("white","black/african american/negro","other asian or pacific islander", "other race, nec","two major races","chinese","american indian or alaska native","three or more major races","japanese"))
library(ggplot2)
base1 = ggplot(data = RaceHIC_DF,
aes(x = HICstatus,
y = counts,
fill= race ))
barGC = base1 + geom_bar(stat = "identity",
position = 'dodge')
#labels
barGC = barGC + geom_text(position = position_dodge(width=0.9),
angle = 90,
hjust = -0.5,
aes(label = counts))
# palette with ordering
barGC = barGC + scale_fill_brewer(name = "PRECINCT",
palette = "BuPu",
direction = -1)
barGC
go from dodge to stack:
# Stacked bar plot
conditionColor = ifelse(RaceHIC_DF$race%in%c("white",'black/african american/negro'),'grey80','grey50')
barStacked = base1 + geom_bar(stat = "identity",
position = 'stack')#default
barStacked = barStacked + geom_text(size = 5,
fontface='bold',
position = position_stack(vjust = 0.5),
color=conditionColor,
aes(label=counts))# its own AES!
barStacked + scale_fill_brewer(palette="GnBu",
direction = -1)
stacked percent
library(scales)
base2 = ggplot(data = RaceHIC_DF,
aes(fill = race,y = counts,x = HICstatus))
barStackPct = base1 + geom_bar(stat = "identity",
position = "fill")
barStackPct1 = barStackPct + geom_text(size = 5,
position = position_fill(vjust = 0.5),
aes(label = percent(pctCol,accuracy = 0.1)))
barStackPct1
# change the ratio of x, y axis
barStackPct2 = barStackPct + geom_text(size = 1,
position = position_fill(vjust = 0.5),
aes(label = percent(pctCol,accuracy = 0.1)))
barStackPct2 = barStackPct2 + coord_fixed(ratio=10/1)+scale_y_continuous(breaks = seq(0,1,0.1))
barStackPct2
Couldn’t show all. It seems this is the “bad idea” introduced in class. I would have to reproduce the table.
base = ggplot(RaceHIC_DF, aes(x = HICstatus,
y = reorder(race, pctCol),
fill = pctCol*100))
heat = base + geom_tile()
# color intensity
heat = heat + scale_fill_gradient(low = "white",
high = "black")
heat = heat + theme_classic()
heat
# improve heat plot
heat = heat + labs(y="Race", "Health Insurance Coverage Status")
heat = heat + theme(axis.text.x = element_text(angle = 0,
vjust = 0.6),
legend.title = element_blank(), #no leg. title
legend.position="top",
legend.direction="horizontal",
legend.key.width=unit(1, "cm"),
legend.key.height=unit(1, "cm"))
heat
TitleRaceHIC = list(Title = "Heat Plot-Race and Health Insurance Coverage in the U.S.",
SubTi = NULL,
XTi = "Health Insurance Coverage Status",
YTi = "Race",
Sou = "Source: IPUMS USA")
# add title
heat2 = heat + labs(title = TitleRaceHIC$Title,
subtitle = TitleRaceHIC$SubTi,
x = TitleRaceHIC$XTi,
y = TitleRaceHIC$YTi,
caption = TitleRaceHIC$Sou)
heat2
# heat3: Title positions:
heat3 = heat2 + theme(plot.caption = element_text(hjust=1),
plot.title = element_text(hjust=0.5))
heat3
Still unobvious visually. Should I exclude “white”? Or is there some other way to retrieve this plot??
# plot a representation of contingency table:
library(ggplot2)
base3 = ggplot(RaceHIC_DF, aes(x = HICstatus,y = counts))
tablePlot = base3 + geom_point(aes(size = pctCol*100))
tablePlot = tablePlot + geom_text(aes(label = percent(pctCol)),
nudge_x = 0.15,
size = 3)
tablePlot
# improve the previous plot
tablePlot = tablePlot + theme_minimal()
tablePlot = tablePlot + theme(legend.position = "none")
tablePlot
# barplot with less information
base4 = ggplot(RaceHIC_DF, aes(x = race, y = counts))
# bars
bars = base4 + geom_bar(stat = "identity") + theme_minimal()
# bar per health insurance status with 'facet'
barsFa = bars + facet_grid(~ HICstatus)
barsFa
# improve previous plot
# change the minimal theme
barsFa = barsFa + theme(axis.text.x = element_text(angle = 90,
hjust = 1,
size = 7))
barsFa
# original relationship Input-Output table
base5 = ggplot(RaceHIC_DF, aes(x = race, y = pctCol ) )
barsIO = base5 + geom_bar( stat = "identity" )
barsIO = barsIO + facet_grid( ~ HICstatus)
barsIO = barsIO + coord_flip()
barsIO
# introduce "reorder""
# race ordered by pctcol
base5b = ggplot(RaceHIC_DF,
aes(x = reorder(race, pctCol), #here
y = pctCol) )
barsIOb = base5b + geom_bar(stat = "identity")
barsIOb = barsIOb + facet_grid( ~ HICstatus)
barsIOb = barsIOb + coord_flip()
barsIOb = barsIOb + theme(axis.text.y = element_text(size = 7,angle = 45))
barsIOb
Nice try. Next step is to: add titles
# name the title
TitleRaHI = list(Title = " Health Insurance Coverage Status by Race in the United States",
SubTi = NULL,
XTi = "Race",
YTi = "Percentage, Health Insurance Coverage Status",
Sou = "Source: 2019 American Community Survey (ACS) Data")
# add title
barsIObTi = barsIOb + labs(title = TitleRaHI$Title,
subtitle = TitleRaHI$SubTi,
x = TitleRaHI$XTi,
y = TitleRaHI$YTi,
caption = TitleRaHI$Sou)
barsIObTi
title positions
barsIObTi1 = barsIObTi + theme(plot.caption = element_text(hjust = 1),
plot.title = element_text(hjust = 0.5))
barsIObTi1
Process ACSData
head(ACSData$poverty,40)
## [1] 0 0 0 0 81 0 0 0 54 0 0 0 0 142 0 0 0 150 0
## [20] 109 0 457 0 0 112 1 0 169 135 0 84 46 0 20 1 0 122 27
## [39] 204 104
ACSData$povertyst
## NULL
ACSData$povertyst <- (ACSData$poverty < 100) # if poverty<100, then povertyst=TRUE, povertyst_num=1
ACSData$povertyst_num <- as.numeric(ACSData$povertyst) # generate a numeric variable to calculate poverty rate by state
head(ACSData$povertyst_num,20)
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0
head(ACSData,5) #check
## hhwt numprec hhtype adjust region statefip countyfip
## 1 58 1 n/a 1.010145 east south central div alabama 0
## 2 64 1 n/a 1.010145 east south central div alabama 0
## 3 54 1 n/a 1.010145 east south central div alabama 0
## 4 75 1 n/a 1.010145 east south central div alabama 0
## 5 26 1 n/a 1.010145 east south central div alabama 3
## metro
## 1 in metropolitan area: central/principal city status indeterminable (mixed)
## 2 in metropolitan area: central/principal city status indeterminable (mixed)
## 3 not in metropolitan area
## 4 in metropolitan area: in central/principal city
## 5 in metropolitan area: central/principal city status indeterminable (mixed)
## city citypop ownershp mortgage acrehous
## 1 not in identifiable city (or size group) 0 n/a n/a n/a
## 2 not in identifiable city (or size group) 0 n/a n/a n/a
## 3 not in identifiable city (or size group) 0 n/a n/a n/a
## 4 not in identifiable city (or size group) 0 n/a n/a n/a
## 5 not in identifiable city (or size group) 0 n/a n/a n/a
## mortamt1 mortamt2 taxincl insincl propinsr owncost rentgrs costelec costgas
## 1 0 0 n/a n/a 0 99999 0 0 0
## 2 0 0 n/a n/a 0 99999 0 0 0
## 3 0 0 n/a n/a 0 99999 0 0 0
## 4 0 0 n/a n/a 0 99999 0 0 0
## 5 0 0 n/a n/a 0 99999 0 0 0
## costwatr costfuel hhincome foodstmp valueh builtyr2 unitsstr phone vehicles
## 1 0 0 9999999 no 9999999 n/a n/a n/a n/a
## 2 0 0 9999999 no 9999999 n/a n/a n/a n/a
## 3 0 0 9999999 no 9999999 n/a n/a n/a n/a
## 4 0 0 9999999 no 9999999 n/a n/a n/a n/a
## 5 0 0 9999999 no 9999999 n/a n/a n/a n/a
## ssmc perwt famsize
## 1 households without a same-sex married couple 58 1 family member present
## 2 households without a same-sex married couple 64 1 family member present
## 3 households without a same-sex married couple 54 1 family member present
## 4 households without a same-sex married couple 75 1 family member present
## 5 households without a same-sex married couple 26 1 family member present
## sex age marst birthyr marrno yrmarr
## 1 male 37 never married/single 1982 not applicable 0
## 2 male 19 never married/single 2000 not applicable 0
## 3 female 87 widowed 1932 married once 1953
## 4 male 26 never married/single 1993 not applicable 0
## 5 male 49 never married/single 1970 not applicable 0
## race raced hispan
## 1 white white not hispanic
## 2 white white not hispanic
## 3 white white not hispanic
## 4 black/african american/negro black/african american/negro not hispanic
## 5 white white not hispanic
## hispand bpl bpld citizen yrnatur yrimmig yrsusa1
## 1 not hispanic indiana indiana n/a n/a 0 0
## 2 not hispanic alabama alabama n/a n/a 0 0
## 3 not hispanic alabama alabama n/a n/a 0 0
## 4 not hispanic north carolina north carolina n/a n/a 0 0
## 5 not hispanic alabama alabama n/a n/a 0 0
## language speakeng tribe racamind racasian
## 1 english yes, speaks only english not applicable or blank no no
## 2 english yes, speaks only english not applicable or blank no no
## 3 english yes, speaks only english not applicable or blank no no
## 4 english yes, speaks only english not applicable or blank no no
## 5 english yes, speaks only english not applicable or blank no no
## racblk racpacis racwht racother hcovany
## 1 no no yes no no health insurance coverage
## 2 no no yes no with health insurance coverage
## 3 no no yes no with health insurance coverage
## 4 yes no no no no health insurance coverage
## 5 no no yes no with health insurance coverage
## hcovpriv
## 1 without private health insurance coverage
## 2 with private health insurance coverage
## 3 with private health insurance coverage
## 4 without private health insurance coverage
## 5 without private health insurance coverage
## hinsemp hinspur
## 1 no insurance through employer/union no insurance purchased directly
## 2 has insurance through employer/union no insurance purchased directly
## 3 no insurance through employer/union has insurance purchased directly
## 4 no insurance through employer/union no insurance purchased directly
## 5 no insurance through employer/union no insurance purchased directly
## hinstri hcovpub
## 1 no insurance through tricare without public health insurance coverage
## 2 no insurance through tricare without public health insurance coverage
## 3 no insurance through tricare with public health insurance coverage
## 4 no insurance through tricare without public health insurance coverage
## 5 no insurance through tricare with public health insurance coverage
## hinscaid hinscare hinsva
## 1 no insurance through medicaid no no insurance through va
## 2 no insurance through medicaid no no insurance through va
## 3 has insurance through medicaid yes no insurance through va
## 4 no insurance through medicaid no no insurance through va
## 5 has insurance through medicaid no no insurance through va
## hinsihs school
## 1 no insurance through indian health service no, not in school
## 2 no insurance through indian health service yes, in school
## 3 no insurance through indian health service no, not in school
## 4 no insurance through indian health service no, not in school
## 5 no insurance through indian health service no, not in school
## educ educd
## 1 1 year of college 1 or more years of college credit, no degree
## 2 1 year of college 1 or more years of college credit, no degree
## 3 grade 12 regular high school diploma
## 4 grade 9 grade 9
## 5 grade 5, 6, 7, or 8 grade 8
## schltype degfield degfieldd degfield2 degfield2d empstat
## 1 not enrolled n/a n/a n/a n/a not in labor force
## 2 public school n/a n/a n/a n/a employed
## 3 not enrolled n/a n/a n/a n/a not in labor force
## 4 not enrolled n/a n/a n/a n/a not in labor force
## 5 not enrolled n/a n/a n/a n/a employed
## empstatd labforce wkswork1 wkswork2 uhrswork
## 1 not in labor force no, not in the labor force 16 14-26 weeks 55
## 2 at work yes, in the labor force 26 14-26 weeks 40
## 3 not in labor force no, not in the labor force 0 n/a n/a
## 4 not in labor force no, not in the labor force 48 48-49 weeks 40
## 5 at work yes, in the labor force 11 1-13 weeks 21
## inctot ftotinc incwage incss incwelfr incinvst incretir incsupp incother
## 1 15800 9999999 15800 0 0 0 0 0 0
## 2 800 9999999 800 0 0 0 0 0 0
## 3 13800 9999999 0 13800 0 0 0 0 0
## 4 23500 9999999 23500 0 0 0 0 0 0
## 5 10700 9999999 1700 0 0 0 0 9000 0
## incearn poverty diffrem diffphys
## 1 15800 0 has cognitive difficulty no ambulatory difficulty
## 2 800 0 no cognitive difficulty no ambulatory difficulty
## 3 0 0 has cognitive difficulty has ambulatory difficulty
## 4 23500 0 no cognitive difficulty no ambulatory difficulty
## 5 1700 81 has cognitive difficulty has ambulatory difficulty
## diffmob diffcare diffsens
## 1 no independent living difficulty no no vision or hearing difficulty
## 2 no independent living difficulty no no vision or hearing difficulty
## 3 has independent living difficulty yes no vision or hearing difficulty
## 4 no independent living difficulty no no vision or hearing difficulty
## 5 has independent living difficulty yes no vision or hearing difficulty
## diffeye diffhear pwstate2 pwcounty tranwork carpool
## 1 no no n/a 0 n/a n/a
## 2 no no alabama 0 auto, truck, or van drives alone
## 3 no no n/a 0 n/a n/a
## 4 no no n/a 0 n/a n/a
## 5 no no alabama 3 auto, truck, or van carpools
## riders trantime departs arrives povertyst povertyst_num
## 1 n/a 0 0 0 TRUE 1
## 2 drives alone 15 902 919 TRUE 1
## 3 n/a 0 0 0 TRUE 1
## 4 n/a 0 0 0 TRUE 1
## 5 3 15 702 719 TRUE 1
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
ACSData1 <- data.frame(ACSData$statefip , ACSData$povertyst_num) # Create a new data frame
head(ACSData1,5)
## ACSData.statefip ACSData.povertyst_num
## 1 alabama 1
## 2 alabama 1
## 3 alabama 1
## 4 alabama 1
## 5 alabama 1
generate “count” variable to count for the total number of person of each state
ACSData1$count
## NULL
ACSData1$count <- 1
head(ACSData1,5)
## ACSData.statefip ACSData.povertyst_num count
## 1 alabama 1 1
## 2 alabama 1 1
## 3 alabama 1 1
## 4 alabama 1 1
## 5 alabama 1 1
calculate
library(dplyr)
ACSData2 <- group_by(ACSData1, ACSData.statefip) %>%
# group_by(ACSData1, ACSData.STATEFP) %>%
mutate(percent = sum(ACSData.povertyst_num)/sum(count))
head(ACSData2,5)
## # A tibble: 5 x 4
## # Groups: ACSData.statefip [1]
## ACSData.statefip ACSData.povertyst_num count percent
## <fct> <dbl> <dbl> <dbl>
## 1 alabama 1 1 0.197
## 2 alabama 1 1 0.197
## 3 alabama 1 1 0.197
## 4 alabama 1 1 0.197
## 5 alabama 1 1 0.197
save the “percent” to new dataframe
ACSData3 <- ACSData2 %>%
distinct(ACSData.statefip, percent, .keep_all = TRUE)
head(ACSData3,5)
## # A tibble: 5 x 4
## # Groups: ACSData.statefip [5]
## ACSData.statefip ACSData.povertyst_num count percent
## <fct> <dbl> <dbl> <dbl>
## 1 alabama 1 1 0.197
## 2 puerto rico 1 1 0.501
## 3 alaska 1 1 0.0997
## 4 arizona 1 1 0.131
## 5 arkansas 1 1 0.174
ACSData3$NAME = ACSData3$ACSData.statefip
head(ACSData3,5)
## # A tibble: 5 x 5
## # Groups: ACSData.statefip [5]
## ACSData.statefip ACSData.povertyst_num count percent NAME
## <fct> <dbl> <dbl> <dbl> <fct>
## 1 alabama 1 1 0.197 alabama
## 2 puerto rico 1 1 0.501 puerto rico
## 3 alaska 1 1 0.0997 alaska
## 4 arizona 1 1 0.131 arizona
## 5 arkansas 1 1 0.174 arkansas
ACSData4 = as.data.frame(ACSData3) # change it to a data frame
head(ACSData4,5)
## ACSData.statefip ACSData.povertyst_num count percent NAME
## 1 alabama 1 1 0.19681177 alabama
## 2 puerto rico 1 1 0.50108932 puerto rico
## 3 alaska 1 1 0.09967846 alaska
## 4 arizona 1 1 0.13064323 arizona
## 5 arkansas 1 1 0.17434508 arkansas
ACSData4$NAME_char <- as.character(ACSData4$NAME) # Apply as.character function
head(ACSData4$NAME_char,5)
## [1] "alabama" "puerto rico" "alaska" "arizona" "arkansas"
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
ACSData4$NAME_char = capitalize(ACSData4$NAME_char)
head(ACSData4,5)
## ACSData.statefip ACSData.povertyst_num count percent NAME
## 1 alabama 1 1 0.19681177 alabama
## 2 puerto rico 1 1 0.50108932 puerto rico
## 3 alaska 1 1 0.09967846 alaska
## 4 arizona 1 1 0.13064323 arizona
## 5 arkansas 1 1 0.17434508 arkansas
## NAME_char
## 1 Alabama
## 2 Puerto rico
## 3 Alaska
## 4 Arizona
## 5 Arkansas
ACSData4$NAME <- as.factor(ACSData4$NAME_char)
head(ACSData4,5)
## ACSData.statefip ACSData.povertyst_num count percent NAME
## 1 alabama 1 1 0.19681177 Alabama
## 2 puerto rico 1 1 0.50108932 Puerto rico
## 3 alaska 1 1 0.09967846 Alaska
## 4 arizona 1 1 0.13064323 Arizona
## 5 arkansas 1 1 0.17434508 Arkansas
## NAME_char
## 1 Alabama
## 2 Puerto rico
## 3 Alaska
## 4 Arizona
## 5 Arkansas
ACSData4$poverty_rate = ACSData4$percent
head(ACSData4,5)
## ACSData.statefip ACSData.povertyst_num count percent NAME
## 1 alabama 1 1 0.19681177 Alabama
## 2 puerto rico 1 1 0.50108932 Puerto rico
## 3 alaska 1 1 0.09967846 Alaska
## 4 arizona 1 1 0.13064323 Arizona
## 5 arkansas 1 1 0.17434508 Arkansas
## NAME_char poverty_rate
## 1 Alabama 0.19681177
## 2 Puerto rico 0.50108932
## 3 Alaska 0.09967846
## 4 Arizona 0.13064323
## 5 Arkansas 0.17434508
linkMap="https://github.com/G7-PUBPOL-543/maps/raw/main/states.geojson"
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.1.4, PROJ 6.3.1
mapUS=read_sf(linkMap)
# see data in map
head(mapUS)
## Simple feature collection with 6 features and 3 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -124.7318 ymin: 30.35893 xmax: -80.83973 ymax: 49.00236
## geographic CRS: NAD83
## # A tibble: 6 x 4
## STATEFP STUSPS NAME geometry
## <chr> <chr> <chr> <MULTIPOLYGON [°]>
## 1 31 NE Nebraska (((-104.053 43.00059, -103.6183 43.00068, -103.133 4…
## 2 53 WA Washingt… (((-122.526 47.35891, -122.514 47.4489, -122.46 47.4…
## 3 35 NM New Mexi… (((-109.0452 36.99908, -108.646 36.99926, -108.2494 …
## 4 46 SD South Da… (((-104.0577 44.99743, -104.0397 45.00133, -104.0401…
## 5 21 KY Kentucky (((-89.13268 36.9822, -89.16645 37.00334, -89.18251 …
## 6 13 GA Georgia (((-85.60516 34.98468, -85.38497 34.98299, -84.93931…
str(ACSData4)
## 'data.frame': 52 obs. of 7 variables:
## $ ACSData.statefip : Factor w/ 62 levels "alabama","alaska",..: 1 60 2 3 4 5 6 7 8 9 ...
## $ ACSData.povertyst_num: num 1 1 1 1 1 1 1 0 1 1 ...
## $ count : num 1 1 1 1 1 1 1 1 1 1 ...
## $ percent : num 0.1968 0.5011 0.0997 0.1306 0.1743 ...
## $ NAME : Factor w/ 52 levels "Alabama","Alaska",..: 1 40 2 3 4 5 6 7 8 9 ...
## $ NAME_char : chr "Alabama" "Puerto rico" "Alaska" "Arizona" ...
## $ poverty_rate : num 0.1968 0.5011 0.0997 0.1306 0.1743 ...
mapUSVars=merge(mapUS,
ACSData4,
by='NAME')
mapUSVars
## Simple feature collection with 40 features and 9 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -170 ymin: 18.92245 xmax: -66.97626 ymax: 71.35256
## geographic CRS: NAD83
## First 10 features:
## NAME STATEFP STUSPS ACSData.statefip ACSData.povertyst_num count
## 1 Alabama 01 AL alabama 1 1
## 2 Alaska 02 AK alaska 1 1
## 3 Arizona 04 AZ arizona 1 1
## 4 Arkansas 05 AR arkansas 1 1
## 5 California 06 CA california 1 1
## 6 Colorado 08 CO colorado 1 1
## 7 Connecticut 09 CT connecticut 0 1
## 8 Delaware 10 DE delaware 1 1
## 9 Florida 12 FL florida 1 1
## 10 Georgia 13 GA georgia 1 1
## percent NAME_char poverty_rate geometry
## 1 0.19681177 Alabama 0.19681177 MULTIPOLYGON (((-88.3273 30...
## 2 0.09967846 Alaska 0.09967846 MULTIPOLYGON (((-154.1567 5...
## 3 0.13064323 Arizona 0.13064323 MULTIPOLYGON (((-114.7196 3...
## 4 0.17434508 Arkansas 0.17434508 MULTIPOLYGON (((-89.7331 36...
## 5 0.14502554 California 0.14502554 MULTIPOLYGON (((-118.6044 3...
## 6 0.12091503 Colorado 0.12091503 MULTIPOLYGON (((-109.0501 4...
## 7 0.11671687 Connecticut 0.11671687 MULTIPOLYGON (((-73.48731 4...
## 8 0.16358839 Delaware 0.16358839 MULTIPOLYGON (((-75.7886 39...
## 9 0.13959053 Florida 0.13959053 MULTIPOLYGON (((-80.84849 2...
## 10 0.17073805 Georgia 0.17073805 MULTIPOLYGON (((-85.60516 3...
library(ggplot2)
# plot original map
base = ggplot(data = mapUS) + geom_sf(fill = 'grey90',
color = NA) + theme_classic()
base #base map of the U.S.
povMap= base + geom_sf(data = mapUSVars,
aes(fill = poverty_rate),
color = NA)
povMap
change color
povMap1 = povMap + scale_fill_gradient(low = 'blue',
high= 'yellow')
add titles
# name the title
TitlePovMap = list(Title = " Poverty Rate Map by State in the United States",
SubTi = NULL,
XTi = NULL,
YTi = NULL,
Sou = "Source: 2019 American Community Survey (ACS) Data")
# add title
povMap2 = povMap1 + labs(title = TitlePovMap$Title,
subtitle = TitlePovMap$SubTi,
x = TitlePovMap$XTi,
y = TitlePovMap$YTi,
caption = TitlePovMap$Sou)
povMap2
title positions
povMap3 = povMap2 + theme(plot.caption = element_text(hjust = 1),
plot.title = element_text(hjust = 0.5))
povMap3